Mapping Sites at Risk

Mapping Sites at Risk#

First we need to ensure we have all of the data from the preliminary analysis again.

#First recreate the data from the preliminary analysis
import geopandas as gpd
import pandas as pd

# Load and join GMCA housing, industrial and office supply data
housing_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Housing Land Supply GIS.shp")
industrial_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Industrial-warehousing Land Supply GIS.shp")
offices_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Offices Land Supply GIS.shp")
total_supply_gdf = pd.concat(
    [housing_supply_gdf, industrial_supply_gdf, offices_supply_gdf]
)

# Load and tidy GMEU Sites of Biological Importance data
sbi_gdf = gpd.read_file("data/gmeu_data/gm_sbi.shp")
sbi_gdf["Category"] = "Site of Biological Importance"
sbi_gdf = sbi_gdf.rename(columns = {"district": "LAName", "site_nam": "SiteRef"})

# Join GMCA and GMEU data
full_data_gdf = pd.concat(
    [total_supply_gdf, sbi_gdf[["SiteRef", "LAName", "Category", "geometry"]]]
)

#Use geopandas to get centroids of all the sites
full_data_gdf["centroid"] = full_data_gdf.centroid

#Split into sites of biological importance and non-biological importance
sbi = full_data_gdf[full_data_gdf["Category"] == "Site of Biological Importance"]
non_sbi = full_data_gdf[full_data_gdf["Category"] != "Site of Biological Importance"]

#Find the number of new developments less than 1km away for each SBI
sbinames = list(sbi["SiteRef"]) #list of all the sbis
distances = list()
less_than_1km = list() #creating empty lists to add to data frame

for x in sbi["centroid"]: #loop through each sbi
    y = non_sbi["centroid"].distance(x) #find all the distances of developments to centroid
    for distance in y: #filter for less than 1km away
            if distance <1000:
                distances.append(distance)
    r = len(distances)    #find no. developments less than 1km away to each sbi
    less_than_1km.append(r)
    distances = list()

Dev_1km = pd.DataFrame({'SiteRef':sbinames, 'No. Sites in 1km': less_than_1km}) #create dataframe of sbi and no. developments     

#Add Scaled Risk Variable
bins = [-1, 0, 10, 20, 30, 40] #upper limit inclusive
labels = [0, 1, 2, 3, 4]
Dev_1km["Risk Scale"] = pd.cut(Dev_1km["No. Sites in 1km"], bins=bins, labels=labels)

Dev_1km
---------------------------------------------------------------------------
DataSourceError                           Traceback (most recent call last)
Cell In[1], line 6
      3 import pandas as pd
      5 # Load and join GMCA housing, industrial and office supply data
----> 6 housing_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Housing Land Supply GIS.shp")
      7 industrial_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Industrial-warehousing Land Supply GIS.shp")
      8 offices_supply_gdf = gpd.read_file("data/gmca_data/2024 GM Offices Land Supply GIS.shp")

File c:\ONSApps\My_Miniconda3\envs\mss-env\Lib\site-packages\geopandas\io\file.py:317, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
    314             filename = response.read()
    316 if engine == "pyogrio":
--> 317     return _read_file_pyogrio(
    318         filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
    319     )
    321 elif engine == "fiona":
    322     if pd.api.types.is_file_like(filename):

File c:\ONSApps\My_Miniconda3\envs\mss-env\Lib\site-packages\geopandas\io\file.py:577, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
    568     warnings.warn(
    569         "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
    570         "will be removed in a future release. You can use the 'columns' keyword "
   (...)    573         stacklevel=3,
    574     )
    575     kwargs["columns"] = kwargs.pop("include_fields")
--> 577 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)

File c:\ONSApps\My_Miniconda3\envs\mss-env\Lib\site-packages\pyogrio\geopandas.py:275, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
    270 if not use_arrow:
    271     # For arrow, datetimes are read as is.
    272     # For numpy IO, datetimes are read as string values to preserve timezone info
    273     # as numpy does not directly support timezones.
    274     kwargs["datetime_as_string"] = True
--> 275 result = read_func(
    276     path_or_buffer,
    277     layer=layer,
    278     encoding=encoding,
    279     columns=columns,
    280     read_geometry=read_geometry,
    281     force_2d=gdal_force_2d,
    282     skip_features=skip_features,
    283     max_features=max_features,
    284     where=where,
    285     bbox=bbox,
    286     mask=mask,
    287     fids=fids,
    288     sql=sql,
    289     sql_dialect=sql_dialect,
    290     return_fids=fid_as_index,
    291     **kwargs,
    292 )
    294 if use_arrow:
    295     import pyarrow as pa

File c:\ONSApps\My_Miniconda3\envs\mss-env\Lib\site-packages\pyogrio\raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
     59 """Read OGR data source into numpy arrays.
     60 
     61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
   (...)    194 
    195 """
    196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 198 return ogr_read(
    199     get_vsi_path_or_buffer(path_or_buffer),
    200     layer=layer,
    201     encoding=encoding,
    202     columns=columns,
    203     read_geometry=read_geometry,
    204     force_2d=force_2d,
    205     skip_features=skip_features,
    206     max_features=max_features or 0,
    207     where=where,
    208     bbox=bbox,
    209     mask=_mask_to_wkb(mask),
    210     fids=fids,
    211     sql=sql,
    212     sql_dialect=sql_dialect,
    213     return_fids=return_fids,
    214     dataset_kwargs=dataset_kwargs,
    215     datetime_as_string=datetime_as_string,
    216 )

File pyogrio\\_io.pyx:1293, in pyogrio._io.ogr_read()

File pyogrio\\_io.pyx:232, in pyogrio._io.ogr_open()

DataSourceError: data/gmca_data/2024 GM Housing Land Supply GIS.shp: No such file or directory

Next, map the SBIs along with their risk threshold using geopandas.

#First here is a map of all the sites of biologcial importance.
sbi.explore("Category", cmap="tab10", tiles="CartoDB positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
#Merge the dataframe with the locations and risk scores

risk_locations = pd.merge(sbi, Dev_1km, on="SiteRef")

risk_locations.explore("Risk Scale", cmap = "YlOrRd", tiles="CartoDB positron")
Make this Notebook Trusted to load map: File -> Trust Notebook
#Alternate way of calculating risk: percentiles
import numpy as np


twenty = int(np.percentile(Dev_1km["No. Sites in 1km"], 20 ))
forty = int(np.percentile(Dev_1km["No. Sites in 1km"], 40 ))
sixty = int(np.percentile(Dev_1km["No. Sites in 1km"], 60 ))
eighty = int(np.percentile(Dev_1km["No. Sites in 1km"], 80 ))
ninety = int(np.percentile(Dev_1km["No. Sites in 1km"], 90 ))
ninetyfive = int(np.percentile(Dev_1km["No. Sites in 1km"], 95 ))
hundred = int(np.percentile(Dev_1km["No. Sites in 1km"], 100 ))

bins = [-1, twenty, forty, sixty, eighty, ninety, ninetyfive, hundred ] #upper limit inclusive
labels = [20, 40, 60, 80, 90, 95, 100]


print(bins)
[-1, 1, 4, 7, 10, 14, 17, 38]
#Plotting percentile scale on a map

Dev_1km["Percentile Scale"] = pd.cut(Dev_1km["No. Sites in 1km"], bins=bins, labels=labels)

risk_locations = pd.merge(sbi, Dev_1km, on="SiteRef")



risk_locations.explore("Percentile Scale", cmap = "YlOrRd", tiles="CartoDB positron")
Make this Notebook Trusted to load map: File -> Trust Notebook